knitr::opts_chunk$set(echo = TRUE,dpi = 400)

Modelling soil pollution in the Nederlands

## Loading required package: nlme
## Warning: package 'nlme' was built under R version 3.6.2
## This is mgcv 1.8-31. For overview type 'help("mgcv-package")'.
## 'data.frame':    155 obs. of  14 variables:
##  $ x      : num  181072 181025 181165 181298 181307 ...
##  $ y      : num  333611 333558 333537 333484 333330 ...
##  $ cadmium: num  11.7 8.6 6.5 2.6 2.8 3 3.2 2.8 2.4 1.6 ...
##  $ copper : num  85 81 68 81 48 61 31 29 37 24 ...
##  $ lead   : num  299 277 199 116 117 137 132 150 133 80 ...
##  $ zinc   : num  1022 1141 640 257 269 ...
##  $ elev   : num  7.91 6.98 7.8 7.66 7.48 ...
##  $ dist   : num  0.00136 0.01222 0.10303 0.19009 0.27709 ...
##  $ om     : num  13.6 14 13 8 8.7 7.8 9.2 9.5 10.6 6.3 ...
##  $ ffreq  : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ soil   : Factor w/ 3 levels "1","2","3": 1 1 1 2 2 2 2 1 1 2 ...
##  $ lime   : Factor w/ 2 levels "0","1": 2 2 2 1 1 1 1 1 1 1 ...
##  $ landuse: Factor w/ 15 levels "Aa","Ab","Ag",..: 4 4 4 11 4 11 4 2 2 15 ...
##  $ dist.m : num  50 30 150 270 380 470 240 120 240 420 ...
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## cadmium ~ s(x, y)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.2458     0.1774    18.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##          edf Ref.df     F p-value    
## s(x,y) 23.48  27.24 8.667  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.607   Deviance explained = 66.7%
## -REML = 372.07  Scale est. = 4.8757    n = 155
## (Intercept)    s(x,y).1    s(x,y).2    s(x,y).3    s(x,y).4    s(x,y).5 
##   3.2458065   0.8686658 -10.2154908   6.4161781  -2.6784725   9.1807111 
##    s(x,y).6    s(x,y).7    s(x,y).8    s(x,y).9   s(x,y).10   s(x,y).11 
##   3.7004932 -10.4780937  -8.9821840  -0.6218677  -4.6789789  -5.4267313 
##   s(x,y).12   s(x,y).13   s(x,y).14   s(x,y).15   s(x,y).16   s(x,y).17 
##   7.4996452   8.1962843  -7.6311640   4.5829340  -0.9734724   0.7634059 
##   s(x,y).18   s(x,y).19   s(x,y).20   s(x,y).21   s(x,y).22   s(x,y).23 
##   8.8112827  -4.8639552  -6.8085148   3.8059356   6.3499868   6.4701169 
##   s(x,y).24   s(x,y).25   s(x,y).26   s(x,y).27   s(x,y).28   s(x,y).29 
##  -8.1556061   7.2050985   0.1567317 -53.4384704  -4.2860149   5.5212533

Adding more variables to predict soil pollution

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## cadmium ~ s(x, y) + s(elev) + s(dist)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.2458     0.1238   26.21   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##            edf Ref.df      F  p-value    
## s(x,y)  20.398 24.599  2.324  0.00078 ***
## s(elev)  1.282  1.496 28.868 6.52e-08 ***
## s(dist)  6.609  7.698 13.677 5.25e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.809   Deviance explained = 84.4%
## -REML = 321.06  Scale est. = 2.3762    n = 155

Plotting the model surface

Customizing 3D plots

Extrapolation in surface plots

Soil pollution in different land uses

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## copper ~ s(dist, by = landuse) + landuse
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   20.115     46.336   0.434    0.665
## landuseAb     15.674     46.608   0.336    0.737
## landuseAg      6.670     46.825   0.142    0.887
## landuseAh     18.137     46.396   0.391    0.697
## landuseAm     13.364     46.439   0.288    0.774
## landuseB       0.825     62.980   0.013    0.990
## landuseBw     18.909     46.756   0.404    0.687
## landuseDEN     0.000      0.000      NA       NA
## landuseFh      4.885    155.441   0.031    0.975
## landuseFw     18.578     46.572   0.399    0.691
## landuseGa    122.541     99.225   1.235    0.219
## landuseSPO     1.885    146.521   0.013    0.990
## landuseSTA     3.261     49.282   0.066    0.947
## landuseTv      4.885    134.312   0.036    0.971
## landuseW      18.917     46.442   0.407    0.684
## 
## Approximate significance of smooth terms:
##                           edf     Ref.df      F  p-value    
## s(dist):landuseAa   1.000e+00  1.000e+00  0.000  0.98367    
## s(dist):landuseAb   1.000e+00  1.000e+00  1.672  0.19842    
## s(dist):landuseAg   1.000e+00  1.000e+00  0.442  0.50767    
## s(dist):landuseAh   2.524e+00  3.113e+00  8.725 2.11e-05 ***
## s(dist):landuseAm   1.000e+00  1.000e+00  6.802  0.01022 *  
## s(dist):landuseB    1.000e+00  1.000e+00  0.023  0.87889    
## s(dist):landuseBw   1.000e+00  1.000e+00  0.047  0.82886    
## s(dist):landuseDEN  1.000e+00  1.000e+00  0.042  0.83865    
## s(dist):landuseFh  -6.730e-16 -6.730e-16  0.000  1.00000    
## s(dist):landuseFw   2.696e+00  3.304e+00  5.292  0.00131 ** 
## s(dist):landuseGa   1.882e+00  1.986e+00  0.654  0.49474    
## s(dist):landuseSPO -3.189e-16 -3.189e-16  0.000  1.00000    
## s(dist):landuseSTA  1.000e+00  1.000e+00  0.005  0.94455    
## s(dist):landuseTv  -1.713e-15 -1.713e-15  0.000  1.00000    
## s(dist):landuseW    4.009e+00  4.814e+00 32.861  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 146/150
## R-sq.(adj) =  0.634   Deviance explained = 71.1%
## -REML = 532.81  Scale est. = 199.48    n = 154
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## copper ~ s(dist, landuse, bs = "fs")
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    30.07       3.33   9.031 1.43e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                   edf Ref.df     F p-value    
## s(dist,landuse) 16.37     71 2.463  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.533   Deviance explained = 58.3%
## -REML = 659.94  Scale est. = 254.2     n = 154

Plotting pollution in different land uses

Polution models with multi-scale interactions

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## cadmium ~ te(x, y, elev)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.2458     0.1329   24.43   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                edf Ref.df     F p-value    
## te(x,y,elev) 38.29  45.86 11.87  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =   0.78   Deviance explained = 83.4%
## -REML = 318.09  Scale est. = 2.7358    n = 155

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## cadmium ~ s(x, y) + s(elev) + ti(x, y, elev)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.7044     0.2244   12.05   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df     F  p-value    
## s(x,y)       21.812 25.491 6.386 7.29e-15 ***
## s(elev)       3.898  4.688 9.680 1.98e-07 ***
## ti(x,y,elev) 14.656 19.180 2.706 0.000516 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.793   Deviance explained = 84.7%
## -REML = 336.62  Scale est. = 2.5755    n = 155